In [1]:
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import holoviews as hl
%load_ext holoviews.ipython
import sklearn.metrics


Using gpu device 1: Tesla K40c
:0: FutureWarning: IPython widgets are experimental and may change in the future.
Welcome to the HoloViews IPython extension! (http://ioam.github.io/holoviews/)
Available magics: %compositor, %opts, %params, %view, %%labels, %%opts, %%view
<matplotlib.figure.Figure at 0x7f60612afcd0>
<matplotlib.figure.Figure at 0x7f606124d3d0>
<matplotlib.figure.Figure at 0x7f606124d1d0>

This is the model without any constraints on kernel or col norms. We want to see what value the col_norms_mean settles to to set these constraints at 80% of the value.


In [135]:
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/quicker_learning_1_fc_layer_experiment_no_norms_repeat_recent.pkl")

In [67]:
def make_curves(model, *args):
    curves = None
    for c in args:
        channel = m.monitor.channels[c]
        c = c[0].upper() + c[1:]
        if not curves:
            curves = hl.Curve(zip(channel.example_record,channel.val_record),group=c)
        else:
            curves += hl.Curve(zip(channel.example_record,channel.val_record),group=c)
    return curves

Plot all col_norms in the fully connected and softmax composite layers.


In [68]:
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "col" in c]
make_curves(m,*means)


Out[68]:

Plot all kernel_norms_mean in convolutional layers.


In [69]:
means = [c for c in sorted(m.monitor.channels.keys()) if "mean" in c and "norm" in c and "kernel" in c]
make_curves(m,*means)


Out[69]:

We think that the first (and in this model - the only) fully connected layer is indicative of the value we're after.


In [136]:
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)


Out[136]:
[<matplotlib.lines.Line2D at 0x7f6055f9add0>]

An attempt to find the saturating value of the mean by fitting a parabola to it and taking the min.


In [137]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = min(p(range(200)))
print(sat)


1.8264633639

In [138]:
sat * 0.8


Out[138]:
1.4611706911165407

That's the 80%.


In [139]:
channel = m.monitor.channels["train_h1_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h1_kernel_norms_mean"]
plt.plot(channel.val_record)


Out[139]:
[<matplotlib.lines.Line2D at 0x7f605643ef50>]

In [140]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat


Out[140]:
0.69939012883976481

In [141]:
sat * 0.8


Out[141]:
0.55951210307181187

80% of layer 1 kernel_norm_mean


In [142]:
channel = m.monitor.channels["train_h2_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_mean"]
plt.plot(channel.val_record)


Out[142]:
[<matplotlib.lines.Line2D at 0x7f605610cf10>]

In [143]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat


Out[143]:
0.81916423981245789

In [144]:
sat * 0.8


Out[144]:
0.65533139184996636

80% of layer 2 kernel_norm_mean


In [145]:
channel = m.monitor.channels["train_h3_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_min"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_mean"]
plt.plot(channel.val_record)


Out[145]:
[<matplotlib.lines.Line2D at 0x7f605600da90>]

In [146]:
z = np.polyfit(channel.epoch_record, channel.val_record, 2)
p = np.poly1d(z)
sat = max(p(range(200)))
sat


Out[146]:
1.221272369224129

In [147]:
sat * 0.8


Out[147]:
0.97701789537930317

80% of layer 2 kernel_norm_mean

Attempt to set the weights with Gavin. Went wrong.


In [186]:
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")

In [117]:
import neukrill_net.plotting as pl
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")


Out[117]:

In [113]:
%%opts HeatMap style(cmap='gray')
pl.model_weights(m)


Out[113]:

In [120]:
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")


Out[120]:

In [119]:
reload(pl)


Out[119]:
<module 'neukrill_net.plotting' from '/afs/inf.ed.ac.uk/user/s13/s1320903/Neuroglycerin/neukrill-net-tools/neukrill_net/plotting.py'>

Attempt to set weights with Matt. Looks like it didn't break yet!


In [184]:
m = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_recent.pkl")

In [151]:
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")


Out[151]:

In [152]:
pl.monitor_channels(m, [c for c in m.monitor.channels if "norm" in c and "max" in c], x_axis = "epoch")


Out[152]:

In [134]:
channel = m.monitor.channels["train_h4_col_norms_max"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.epoch_record, channel.val_record)
channel = m.monitor.channels["train_h4_col_norms_mean"]
plt.plot(channel.epoch_record, channel.val_record)


Out[134]:
[<matplotlib.lines.Line2D at 0x7f605d0a5e50>]

The same model with dropout set to 0.9.


In [183]:
m_drop = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_dropout_recent.pkl")

In [163]:
pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["train_y_y_1_nll"], x_axis = "epoch")


Out[163]:

Compare how fast the orginial and the dropout models are going down.


In [185]:
pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m_drop, ["valid_y_y_1_nll"], x_axis = "epoch")


Out[185]:

The model with more augmentations and no dropout (set to 1).


In [175]:
m_aug = pylearn2.utils.serial.load(
    "/disk/scratch/neuroglycerin/models/experiment_setting_colnorms_aug_recent.pkl")

In [179]:
pl.monitor_channels(m_aug, ["valid_y_y_1_nll"], x_axis = "epoch") + pl.monitor_channels(m, ["valid_y_y_1_nll"], x_axis = "epoch")


Out[179]:

In [182]:
pl.monitor_channels(m_aug, [c for c in m.monitor.channels if "norms_mean" in c], x_axis = "epoch")


Out[182]: